{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# MLflow Training Tutorial\n",
    "\n",
    "This `train.pynb` Jupyter notebook predicts the quality of wine using [sklearn.linear_model.ElasticNet](http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html).  \n",
    "\n",
    "> This is the Jupyter notebook version of the `train.py` example\n",
    "\n",
    "Attribution\n",
    "* The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality\n",
    "* P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.\n",
    "* Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Wine Quality Sample\n",
    "def train(in_alpha, in_l1_ratio):\n",
    "    import os\n",
    "    import warnings\n",
    "    import sys\n",
    "\n",
    "    import pandas as pd\n",
    "    import numpy as np\n",
    "    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
    "    from sklearn.model_selection import train_test_split\n",
    "    from sklearn.linear_model import ElasticNet\n",
    "\n",
    "    import mlflow\n",
    "    import mlflow.sklearn\n",
    "    \n",
    "    import logging\n",
    "    logging.basicConfig(level=logging.WARN)\n",
    "    logger = logging.getLogger(__name__)\n",
    "\n",
    "    def eval_metrics(actual, pred):\n",
    "        rmse = np.sqrt(mean_squared_error(actual, pred))\n",
    "        mae = mean_absolute_error(actual, pred)\n",
    "        r2 = r2_score(actual, pred)\n",
    "        return rmse, mae, r2\n",
    "\n",
    "\n",
    "    warnings.filterwarnings(\"ignore\")\n",
    "    np.random.seed(40)\n",
    "\n",
    "    # Read the wine-quality csv file from the URL\n",
    "    csv_url =\\\n",
    "        'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'\n",
    "    try:\n",
    "        data = pd.read_csv(csv_url, sep=';')\n",
    "    except Exception as e:\n",
    "        logger.exception(\n",
    "            \"Unable to download training & test CSV, check your internet connection. Error: %s\", e)\n",
    "\n",
    "    # Split the data into training and test sets. (0.75, 0.25) split.\n",
    "    train, test = train_test_split(data)\n",
    "\n",
    "    # The predicted column is \"quality\" which is a scalar from [3, 9]\n",
    "    train_x = train.drop([\"quality\"], axis=1)\n",
    "    test_x = test.drop([\"quality\"], axis=1)\n",
    "    train_y = train[[\"quality\"]]\n",
    "    test_y = test[[\"quality\"]]\n",
    "\n",
    "    # Set default values if no alpha is provided\n",
    "    if float(in_alpha) is None:\n",
    "        alpha = 0.5\n",
    "    else:\n",
    "        alpha = float(in_alpha)\n",
    "\n",
    "    # Set default values if no l1_ratio is provided\n",
    "    if float(in_l1_ratio) is None:\n",
    "        l1_ratio = 0.5\n",
    "    else:\n",
    "        l1_ratio = float(in_l1_ratio)\n",
    "\n",
    "    # Useful for multiple runs (only doing one run in this sample notebook)    \n",
    "    with mlflow.start_run():\n",
    "        # Execute ElasticNet\n",
    "        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)\n",
    "        lr.fit(train_x, train_y)\n",
    "\n",
    "        # Evaluate Metrics\n",
    "        predicted_qualities = lr.predict(test_x)\n",
    "        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)\n",
    "\n",
    "        # Print out metrics\n",
    "        print(\"Elasticnet model (alpha=%f, l1_ratio=%f):\" % (alpha, l1_ratio))\n",
    "        print(\"  RMSE: %s\" % rmse)\n",
    "        print(\"  MAE: %s\" % mae)\n",
    "        print(\"  R2: %s\" % r2)\n",
    "\n",
    "        # Log parameter, metrics, and model to MLflow\n",
    "        mlflow.log_param(\"alpha\", alpha)\n",
    "        mlflow.log_param(\"l1_ratio\", l1_ratio)\n",
    "        mlflow.log_metric(\"rmse\", rmse)\n",
    "        mlflow.log_metric(\"r2\", r2)\n",
    "        mlflow.log_metric(\"mae\", mae)\n",
    "\n",
    "        mlflow.sklearn.log_model(lr, \"model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elasticnet model (alpha=0.500000, l1_ratio=0.500000):\n",
      "  RMSE: 0.82224284975954\n",
      "  MAE: 0.6278761410160691\n",
      "  R2: 0.12678721972772689\n"
     ]
    }
   ],
   "source": [
    "train(0.5, 0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elasticnet model (alpha=0.200000, l1_ratio=0.200000):\n",
      "  RMSE: 0.7859129997062342\n",
      "  MAE: 0.6155290394093894\n",
      "  R2: 0.20224631822892092\n"
     ]
    }
   ],
   "source": [
    "train(0.2, 0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Elasticnet model (alpha=0.100000, l1_ratio=0.100000):\n",
      "  RMSE: 0.7792546522251949\n",
      "  MAE: 0.6112547988118587\n",
      "  R2: 0.2157063843066196\n"
     ]
    }
   ],
   "source": [
    "train(0.1, 0.1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
